{
 "metadata": {
  "name": "",
  "signature": "sha256:ad8f3642e5fc17e9a865a2029757cb76dda90a06e6348ed24bf8aad3bb7e1237"
 },
 "nbformat": 3,
 "nbformat_minor": 0,
 "worksheets": [
  {
   "cells": [
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "%matplotlib inline \n",
      "import pandas as pd\n",
      "import matplotlib.pyplot as plt\n",
      "import pylab as pl\n",
      "import numpy as np\n",
      "import scipy as sc\n",
      "from twitterSentiment.models import (CompanyStocksSentimentHistory,Company, CompanyFinancials, TwitterText, \n",
      "CompanyAltmanZscore, CompanySentiment, CompanyKeyStats)\n",
      "from django.db.models import Q"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 1
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "def company_sentiment_calculations(companyid):\n",
      "    try:\n",
      "        company_sentiments = CompanySentiment.objects.filter(company_id=companyid).values()\n",
      "        if company_sentiments.count() >0:\n",
      "            company_sentiments_list = pd.DataFrame(list(company_sentiments))\n",
      "            averages = pd.Series([company_sentiments_list['sentiment_prob_very_negative'].astype(float).sum(),\n",
      "                company_sentiments_list['sentiment_prob_negative'].astype(float).sum(),\n",
      "                company_sentiments_list['sentiment_prob_neutral'].astype(float).sum(),\n",
      "                company_sentiments_list['sentiment_prob_positive'].astype(float).sum(),\n",
      "                company_sentiments_list['sentiment_prob_very_positive'].astype(float).sum()],\n",
      "                index=[0,1,2,3,4])\n",
      "            return (averages.idxmax(axis=1)) #pick the index with the maximum probability\n",
      "        else:\n",
      "            return \n",
      "    except:\n",
      "        print (\"sentiment error occured for company:\", companyid, \"error:\",sys.exc_info())\n",
      "        return (-1)\n",
      "\n",
      "    \n",
      "def company_zscore_calculations(companyid):\n",
      "    try:\n",
      "        company_zscore = CompanyAltmanZscore.objects.all().filter(company_id=companyid).values('zscore')\n",
      "        r = list(company_zscore[:1])\n",
      "        if r:\n",
      "            return r[0]['zscore']\n",
      "        return \n",
      "\n",
      "    except:\n",
      "        print (\"zscore error occured for company:\", companyid, \"error:\",sys.exc_info())\n",
      "        return (-1)\n",
      "\n",
      "def build_data_file():\n",
      "    ##use this method to create the companysentimentzscore for the analysis\n",
      "    companylist = pd.DataFrame(list(Company.objects.filter(~Q(marketCap=None).values())))\n",
      "    print(\"total companies listed\",companylist.count())\n",
      "    companylist['sentiment']=companylist[['id']].apply(company_sentiment_calculations, axis=1)\n",
      "    companylist['altman']=companylist[['id']].apply(company_zscore_calculations, axis=1)\n",
      "    companylistix = companylist.set_index(['id'])\n",
      "    companylistix.to_csv(\"companysentimentszscore.csv\")"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 2
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "Altman Z-Score for Sampled Firms (Table 14)"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "companyinfo= pd.read_csv(\"companysentimentszscore.csv\")\n",
      "print (\"Total Firms: \", companyinfo.count())\n",
      "altmanzscore = companyinfo.altman\n",
      "distress = companyinfo.altman[companyinfo.altman<=1.8]\n",
      "safe = companyinfo.altman[companyinfo.altman>=3.0]\n",
      "neutral = companyinfo.altman[companyinfo.altman.between(1.8,3)]\n",
      "print (\"****Distress:\", distress.count(), \",Safe:\",safe.count(), \",Neutral:\",neutral.count())\n",
      "print (\"\\n\\n****Description for Financially-Distressed Firms Altman < 1.8\")\n",
      "print (distress.describe())\n",
      "print (\"\\n\\n****Description for Neutral Firms Altman > 1.8 & < 3.0\")\n",
      "print (neutral.describe())\n",
      "print (\"\\n\\n****Description for Safe Firms >= 3.0\")\n",
      "print (safe.describe())"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "Total Firms:  id                5787\n",
        "adrTso            5787\n",
        "date_extracted    5787\n",
        "exchange          5787\n",
        "industry          5787\n",
        "ipoYear           5787\n",
        "lastSale          5787\n",
        "marketCap         5787\n",
        "name              5787\n",
        "sector            5787\n",
        "summaryQuote      5787\n",
        "symbol            5787\n",
        "sentiment         3985\n",
        "altman            3313\n",
        "dtype: int64\n",
        "****Distress: 1032 ,Safe: 1690 ,Neutral: 591\n",
        "\n",
        "\n",
        "****Description for Financially-Distressed Firms Altman < 1.8\n",
        "count    1032.000000\n",
        "mean       -1.961134\n",
        "std         9.818075\n",
        "min      -157.210000\n",
        "25%        -1.091750\n",
        "50%         0.663500\n",
        "75%         1.255500\n",
        "max         1.798000\n",
        "Name: altman, dtype: float64\n",
        "\n",
        "\n",
        "****Description for Neutral Firms Altman > 1.8 & < 3.0\n",
        "count    591.000000\n",
        "mean       2.374411\n",
        "std        0.337121\n",
        "min        1.801000\n",
        "25%        2.092500\n",
        "50%        2.375000\n",
        "75%        2.663500\n",
        "max        2.997000\n",
        "Name: altman, dtype: float64\n",
        "\n",
        "\n",
        "****Description for Safe Firms >= 3.0\n",
        "count    1690.000000\n",
        "mean        9.925705\n",
        "std        20.506765\n",
        "min         3.001000\n",
        "25%         4.009500\n",
        "50%         5.404500\n",
        "75%         8.883500\n",
        "max       454.079000\n",
        "Name: altman, dtype: float64\n"
       ]
      }
     ],
     "prompt_number": 3
    },
    {
     "cell_type": "raw",
     "metadata": {},
     "source": [
      "Descriptive Statistics of non-Financial Firms "
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "companies = companyinfo[~(companyinfo.sentiment.isnull()) & ~(companyinfo.altman.isnull()) & ~(companyinfo.sector.isin(['Finance']))]\n",
      "print (companies.describe())"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "                id     lastSale     marketCap    sentiment       altman\n",
        "count  2618.000000  2618.000000  2.618000e+03  2618.000000  2618.000000\n",
        "mean   3118.103132    36.625553  9.044559e+09     2.158136     4.643611\n",
        "std    1921.975390    51.212159  3.000794e+10     0.667646    12.866519\n",
        "min       6.000000     0.100000  2.102689e+06     0.000000  -153.814000\n",
        "25%    1490.750000     6.960000  2.736628e+08     2.000000     1.372000\n",
        "50%    2955.500000    22.610000  1.333308e+09     2.000000     3.035500\n",
        "75%    4747.500000    49.590000  5.094740e+09     2.000000     5.489500\n",
        "max    6674.000000  1135.970000  6.744566e+11     4.000000   269.368000\n"
       ]
      }
     ],
     "prompt_number": 4
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "def altmanscale(value):\n",
      "    if value <=1.8:\n",
      "        return -1\n",
      "    elif (value>1.8) & (value<3.0):\n",
      "        return 0\n",
      "    else:\n",
      "        return 1\n",
      "def sentimentscale(value):\n",
      "    if value <2:\n",
      "        return -1\n",
      "    elif value ==2:\n",
      "        return 0\n",
      "    else:\n",
      "        return 1\n",
      "def sentimentsstr(value):\n",
      "    if value <2:\n",
      "        return \"negative\"\n",
      "    elif value ==2:\n",
      "        return \"neutral\"\n",
      "    else:\n",
      "        return \"positive\"\n",
      "    \n",
      "def stockmovementstr(value):\n",
      "    if value ==1:\n",
      "        return \"negative\"\n",
      "    elif value ==0:\n",
      "        return \"neutral\"\n",
      "    else:\n",
      "        return \"positive\"\n",
      "    \n",
      "\n",
      "##sentiments plots excluding neutral\n",
      "sentiments = companies.groupby('sentiment')['sentiment']\n",
      "print (\"\\n**** Sentiments Breakdown\",sentiments.count())\n",
      "\n",
      "companies.altmanscale = companies.altman.apply(altmanscale)\n",
      "companies.sentimentscale = companies.sentiment.apply(sentimentscale)\n",
      "print (\"\\n****Sentiments by Main Categories\",companies.sentimentscale.groupby(companies.sentimentscale).count())\n",
      "print (\"\\n**** Sentiments Breakdown Excluding Neutral\")\n",
      "print(companies[~(companies.sentiment==2)].groupby('sentiment')['sentiment'].count().plot(kind=\"bar\"))"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "\n",
        "**** Sentiments Breakdown sentiment\n",
        "0              18\n",
        "1             137\n",
        "2            2094\n",
        "3             151\n",
        "4             218\n",
        "Name: sentiment, dtype: int64\n",
        "\n",
        "****Sentiments by Main Categories sentiment\n",
        "-1            155\n",
        " 0           2094\n",
        " 1            369\n",
        "Name: sentiment, dtype: int64\n",
        "\n",
        "**** Sentiments Breakdown Excluding Neutral\n",
        "Axes(0.125,0.125;0.775x0.775)"
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "\n"
       ]
      },
      {
       "metadata": {},
       "output_type": "display_data",
       "png": "iVBORw0KGgoAAAANSUhEUgAAAXIAAAEUCAYAAAA2ib1OAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAFgRJREFUeJzt3X+MZXd53/H3U6/dFqg0QZT1r20XFUi6qZV1AobUf3jS\nVMhJGkxa4UAUhQWaIqHwI1WrrKlSUyVNIEpoRKvQVDiso4KTVZOmOAViO/K3SpRiF7Lm1+LGpozE\nOngNCCg0hfrH0z/uWe9lZvbOeu737ne+57xf0mjvOffHee6zd54585lzz43MRJLUr7/UugBJ0nIc\n5JLUOQe5JHXOQS5JnXOQS1LnHOSS1LmFgzwiDkTE3RHxqYj4ZES8cVj/1og4FREnhq8fmLvPTRHx\nQETcHxEvWfUTkKSpi0XHkUfEpcClmXlfRDwD+CjwMuBG4GuZ+Y5Ntz8EvA94IXAFcBfw/Mx8YkX1\nS9LkLdwjz8yHM/O+4fLXgU8zG9AAsc1dbgBuy8xHM3MDeBC4pl65kqTNzjsjj4iDwNXAh4dVb4iI\nj0XELRGxNqy7HDg1d7dTnB38kqQVOK9BPsQq/wl407Bn/i7gOcBh4PPAryy4u+cAkKQV2rfTDSLi\nYuB3gP+Ymb8HkJmPzF3/buD2YfEh4MDc3a8c1m1+TIe7JO1CZm6JtXc6aiWAW4CTmfmrc+svm7vZ\njwCfGC6/H3hFRFwSEc8Bngfce45i9vzXzTff3LyGsXzZS/u5l7966ee57LRHfi3w48DHI+LEsO4t\nwCsj4jCz2OSzwOuG4XwyIo4DJ4HHgNfnoq3vcRsbG61LGA17WZf9rKv3fi4c5Jn5x2y/1/7BBff5\nBeAXlqxLknSefGfnAkeOHGldwmjYy7rsZ12993PhG4JWttGInhMXSWoiIsin+sfOqSultC5hNOxl\nXfazrt776SCXpM4ZrUhSJ4xWJGmkHOQL9J6b7SX2si77WVfv/XSQS1LnzMglqRNm5JI0Ug7yBXrP\nzfYSe1mX/ayr9346yCWpc2bkktQJM3JJGikH+QK952Z7ib2sy37W1Xs/HeSS1DkzcknqhBm5JI2U\ng3yB3nOzvcRe1mU/6+q9nw5ySeqcGbkkdcKMXJJGykG+QO+52V5iL+uyn3X13k8HuSR1zoxcUlci\ntkTEe1btOXeujHxf1a1I0gXRw47ghfuBY7SyQO+52V5iL+uyn7WV1gUsxUEuSZ0zI5fUlVlG3sP8\niAuWkbtHLkmdc5AvYA5Zj72sy37WVloXsBQHuSR1zoxcUlfMyM3IJWl0HOQLmEPWYy/rsp+1ldYF\nLMVBLkmdW5iRR8QB4DeBZzMLpf5DZr4zIp4J/DbwN4EN4MbM/Mpwn5uA1wCPA2/MzDu2eVwzckm7\nYka+NSPfaZBfClyamfdFxDOAjwIvA14NfDEzfykifgb4tsw8GhGHgPcBLwSuAO4Cnp+ZT2x6XAe5\npF1xkD/FP3Zm5sOZed9w+evAp5kN6JcCtw43u5XZcAe4AbgtMx/NzA3gQeCaKs+gAXPIeuxlXfaz\nttK6gKWcd0YeEQeBq4F7gP2ZeXq46jSwf7h8OXBq7m6nmA1+SdKKnNdpbIdY5XeAN2Xm1+bPB5yZ\nGRGLfn/Y9rojR45w8OBBANbW1jh8+DDr6+vA2b0Nl8e1fMZeqaf35TP2Sj0Xanl41sD63GUqLLPD\n9bt7vGWebymFY8eOATw5L7ez4xuCIuJi4PeBD2bmrw7r7gfWM/PhiLgMuDszvyMijgJk5tuG230I\nuDkz79n0mGbkknbFjPwpZuQx69gtwMkzQ3zwfuBVw+VXAb83t/4VEXFJRDwHeB5w77LFt7J5z0e7\nZy/rsp+1ldYFLGWnaOVa4MeBj0fEiWHdTcDbgOMR8VqGww8BMvNkRBwHTgKPAa9311uSVstzrUjq\nitGK51qRpNFxkC9gDlmPvazLftZWWhewFAe5JHXOjFxSV8zIzcglaXQc5AuYQ9ZjL+uyn7WV1gUs\nxUEuSZ0zI5fUFTNyM3JJGh0H+QLmkPXYy7rsZ22ldQFLcZBLUufMyCV1xYzcjFySRsdBvoA5ZD32\nsi77WVtpXcBSHOSS1DkzckldMSM3I5ek0XGQL2AOWY+9rMt+1lZaF7AUB7kkdc6MXFJXzMjNyCVp\ndBzkC5hD1mMv67KftZXWBSzFQS5JnTMjl9QVM3IzckkaHQf5AuaQ9djLuuxnbaV1AUtxkEtS58zI\nJXXFjNyMXJJGx0G+gDlkPfayLvtZW2ldwFIc5JLUOTNySV0xIzcjl6TRcZAvYA5Zj72sy37WVloX\nsBQHuSR1bseMPCJ+A/gh4JHMvGpY91bgHwNfGG72lsz84HDdTcBrgMeBN2bmHds8phm5pF0xI99d\nRv4e4PpN6xJ4R2ZePXydGeKHgB8FDg33+bWIcK9fklZoxyGbmX8EfHmbq7b8VABuAG7LzEczcwN4\nELhmqQobMoesx17WZT9rK60LWMoye8tviIiPRcQtEbE2rLscODV3m1PAFUtsQ5K0g/M6jjwiDgK3\nz2Xkz+ZsPv5zwGWZ+dqI+LfAhzPzvcPt3g18IDN/d9PjmZFrMmaZbh96+L40I9+ake/bzYNl5iNz\nD/xu4PZh8SHgwNxNrxzWbXHkyBEOHjwIwNraGocPH2Z9fR04+2ujyy6PYXnmbuDMchn+3WvL3zdb\n2mP9276fZRfP70IvD0tLPN9SCseOHQN4cl5uZ7d75Jdl5ueHyz8NvDAzf2z4Y+f7mOXiVwB3Ac/d\nvPvdyx55KWXTi0e7NeVermYPsnB2cNRSfw9yFabcz13vkUfEbcB1wLMi4nPAzcB6RBxm1s3PAq8D\nyMyTEXEcOAk8Bry+i4ktSR3zXCvSik05012FKffTc61I0kg5yBfwWN167GVtpXUBI1NaF7AUB7kk\ndc6MXFqxKWe6qzDlfpqRS9JIOcgXMNetx17WVloXMDKldQFLcZBLUufMyKUVm3KmuwpT7mfVc61o\n3DzJk9QXo5UFpp3rZuWvu1fwmFNWWhcwMqV1AUtxkEtS58zItcWUM8hVsJ91TbmfHkcuSSPlIF9g\n2hl5baV1ASNTWhcwMqV1AUtxkEtS58zItcWUM8hVsJ91TbmfZuSSNFIO8gXMyGsqrQsYmdK6gJEp\nrQtYioNckjpnRq4tppxBroL9rGvK/TQjl6SRcpAvYEZeU2ldwMiU1gWMTGldwFIc5JLUOTNybTHl\nDHIV7GddU+6nGbkkjZSDfAEz8ppK6wJGprQuYGRK6wKW4iCXpM6ZkWuLKWeQq2A/65pyP83IJWmk\nHOQLmJHXVFoXMDKldQEjU1oXsBQHuSR1zoxcW0w5g1wF+1nXlPtpRi5JI+UgX8CMvKbSuoCRKa0L\nGJnSuoClOMglqXNm5NpiyhnkKtjPuqbcz11n5BHxGxFxOiI+MbfumRFxZ0T8WUTcERFrc9fdFBEP\nRMT9EfGSek9BkrSd84lW3gNcv2ndUeDOzHw+8IfDMhFxCPhR4NBwn1+LiG7jGzPymkrrAkamtC5g\nZErrApay45DNzD8Cvrxp9UuBW4fLtwIvGy7fANyWmY9m5gbwIHBNnVIlSdvZ7d7y/sw8PVw+Dewf\nLl8OnJq73Sngil1uo7n19fXWJYzIeusCRma9dQEjs966gKUsHXsMf7VclOj38FcJSerWvl3e73RE\nXJqZD0fEZcAjw/qHgANzt7tyWLfFkSNHOHjwIABra2scPnz4yT3gM9l06+Uz6/ZKPRdqeaZwdi+l\nDP8us3wf8OaKjzdX6R7rn/1c7fLZmnd6Pk9leW/2s5TCsWPHAJ6cl9s5r8MPI+IgcHtmXjUs/xLw\npcx8e0QcBdYy8+jwx873McvFrwDuAp67+VjDXg4/LKVMMl5ZzeFdhfq/vk75cLmC/ayp0EM/z3X4\n4Y6DPCJuA64DnsUsD/+XwH8BjgN/A9gAbszMrwy3fwvwGuAx4E2Z+QfbPGYXg3yqpnyc7irYz7qm\n3M9dD/JVcJDvbVP+RlkF+1nXlPvpSbN2wePIayqtCxiZ0rqAkSmtC1iKg1ySOme0oi2m/KvrKtjP\nuqbcT6MVSRopB/kCZuQ1ldYFjExpXcDIlNYFLMVBLkmdMyPXFlPOIFfBftY15X6akUvSSDnIFzAj\nr6m0LmBkSusCRqa0LmApDnJJ6pwZubaYcga5Cvazrin304xckkbKQb6AGXlNpXUBI1NaFzAypXUB\nS3GQS1LnzMi1xZQzyFWwn3VNuZ9m5JI0Ug7yBczIayqtCxiZ0rqAkSmtC1iKg1ySOmdGri2mnEGu\ngv2sa8r9NCOXpJFykC9gRl5TaV3AyJTWBYxMaV3AUhzkktQ5M3JtMeUMchXsZ11T7qcZuSSNlIN8\nATPymkrrAkamtC5gZErrApbiIJekzpmRa4spZ5CrYD/rmnI/zcglaaQc5AuYkddUWhcwMqV1ASNT\nWhewFAe5JHXOjFxbTDmDXAX7WdeU+2lGLkkj5SBfwIy8ptK6gJEprQsYmdK6gKU4yCWpc2bk2mLK\nGeQq2M+6ptxPM3JJGqmlBnlEbETExyPiRETcO6x7ZkTcGRF/FhF3RMRanVIvPDPymkrrAkamtC5g\nZErrApay7B55AuuZeXVmXjOsOwrcmZnPB/5wWJYkrchSGXlEfBZ4QWZ+aW7d/cB1mXk6Ii4FSmZ+\nx6b7mZHvYVPOIFfBftY15X6uKiNP4K6I+EhE/OSwbn9mnh4unwb2L7kNSdIC+5a8/7WZ+fmI+OvA\nncPe+JMyMyNi2x9JR44c4eDBgwCsra1x+PBh1tfXgbPZdOvlM+v2Sj0XanmmAOtzl1ly+T7gzRUf\nb67SPdY/+7na5bM17/R8nsry3uxnKYVjx44BPDkvt1Pt8MOIuBn4OvCTzHLzhyPiMuDuXqOVUsqm\nF880rOZX18LZF3otU44CCvazpkIP/TxXtLLrQR4RTwMuysyvRcTTgTuAfwX8feBLmfn2iDgKrGXm\n0U337WKQT9WUM8hVsJ91Tbmf5xrky0Qr+4H/PGsq+4D3ZuYdEfER4HhEvBbYAG5cYhuSpB34zs4F\njFZqKvTwq+sq2M+6ptxP39kpSSPlHrm2mHIGuQr2s64p99M9ckkaKQf5Ap5rpabSuoCRKa0LGJnS\nuoClOMglqXNm5NpiyhnkKtjPuqbcTzNySRopB/kCZuQ1ldYFjExpXcDIlNYFLMVBLkmdMyPXFlPO\nIFfBftY15X6akUvSSDnIFzAjr6m0LmBkSusCRqa0LmApDnJJ6pwZubaYcga5Cvazrin304xckkbK\nQb6AGXlNpXUBI1NaFzAypXUBS3GQS1LnzMi1xZQzyFWwn3VNuZ9m5JI0Ug7yBczIayqtCxiZ0rqA\nkSmtC1iKg1ySOmdGri2mnEGugv2sa8r9NCOXpJFykC9gRl5TaV3AyJTWBYxMaV3AUva1LqCW2a9b\nfejh11dJ/RhNRj7l3Kw2e1mX/axryv00I5ekkXKQL1RaFzAipXUBI1NaFzAypXUBS3GQS1LnzMgv\nuL2fQ9rLuuxnXVPupxm5JI2Ug3yh0rqAESmtCxiZ0rqAkSmtC1iKg1ySOmdGfsHt/RzSXtZlP+ua\ncj/NyCVppFYyyCPi+oi4PyIeiIifWcU2LozSuoARKa0LGJnSuoCRKa0LWEr1QR4RFwH/DrgeOAS8\nMiL+du3tXBj3tS5gROxlXfazrr77uYo98muABzNzIzMfBX4LuGEF27kAvtK6gBGxl3XZz7r67ucq\nBvkVwOfmlk8N6yRJK7CKQd7Dn5PP00brAkZko3UBI7PRuoCR2WhdwFJWcT7yh4ADc8sHmO2Vf4vV\nnD98FY95a/VH7OPc6fayLvtZl/38lu2s4DjHfcD/BL4f+HPgXuCVmfnpqhuSJAEr2CPPzMci4qeA\nPwAuAm5xiEvS6jR5Z6ckqR7f2SlJnRvNhy/XEBH7gSuZHXnzUGaeblySBPjarG1s/TRaASLiauBd\nwBpnj7C5ktm7BF6fmX/aqrZeje0bpRVfm3WNtZ8OciAiPgb8k8y8Z9P6FwO/npnf1aay/oz1G6UV\nX5t1jbWfRiszT9v8HwuQmR+OiKe3KKhjxzj3N8p7gC6/URrytVnXKPvpIJ/5YER8gNk7Aj7H7N0G\nB4CfAD7UsrAOjfIbpSFfm3WNsp9GK4OI+EHgpZw9L8xDwPsz8wPtqupPRLwTeC7bf6P8r8z8qYbl\ndcnXZl1j7KeDXNWN8RtF2ssc5DuIiNdl5q+3rkPazNdmXT330zcE6YKJiNe1rkEaI//YORg+xegG\nzsYBp5jFAV3+hNZ4RMSLgCcy839ExHcy+/StT/varCMifjMzf6LnfjrIgeFzRV/J7NOMzhxxcQC4\nLSJ+OzN/sVlx4/Jo6wJ6ExFvZTa4L46IO4AXAXcDRyPiuzPz51vW15uIuJ3Zm9Tmzy/79yLi24DM\nzJe2qWw5ZuRARDwAHBo+mm5+/SXAycx8bpvKxiUiPpeZB3a+pc6IiE8Ch4FLgNPAlZn51Yj4q8C9\nmXlV0wI7ExEngJPAu4EnmA3024BXAGTmf2tX3e65Rz7zOLNIZWPT+suH63SeIuITC67ef8EKGY/H\nMvMx4LGI+ExmfhUgM/9vRDzRuLYevQB4E/AvgH+emSci4hu9DvAzHOQzbwbuiogHOft5oweA5wEe\n9/zUPJtZFPDlba77kwtcyxh8MyKelpl/AXz3mZURsYY7GU9ZZj4OvCMijgP/JiIeYQRzsPsnUENm\nfigivh24htmeeTI79vkjw96Qzt9/BZ6RmSc2XxERXe/1NHJdZn4DIDPn98D3Aa9qU1L/MvMU8PKI\n+AfAV1vXsywzcknqnMeRS1LnHOSS1DkHuSR1zkGuSYmI74qIH5hb/uHhDWGr3OZ1EfG9q9yGps1B\nrqm5GvjBMwuZeXtmvn3F2/w+4O+ueBuaMI9aUTeGD6Y4zuwQ0YuAnwM+A/wK8Azgi8CRzHw4Igrw\nYWZDdA14LbPTL3wG+CvMDi/9ReBpwPdk5hsi4hjwF8yG/bOB1zA7xO/FwD2Z+eqhjpcAbwX+8vB4\nr87M/xMRG8w+IemHgYuBlwPfBP47s2O+vwC8ITP/eAXt0YS5R66eXM/sg5wPD29N/xDwTuAfZeYL\nmH2U3L8ebpvARZn5ImZv+Lp5OAXDzwK/lZlXZ+bx4Xbz1jLze4GfBt7P7IfEdwJXDbHMs5i9K/D7\nM/N7gI8C/3Rum18Y1r8L+GeZuQH8e+AdwzYd4qrONwSpJx8Hfjki3gb8PrMPdP47zN6VC7O99D+f\nu/3vDv/+KXBwuBx86wmT5iVw+3D5k8DpzPwUQER8aniMA8Ah4E+GbV7Ct75jdX6b/3Bu/bm2KS3N\nQa5uZOYDEXE18EPAzzM7C+CnMvNc+fM3h38f5/xf6/9v+PeJufufWd43PNadmfljFbcpLcVoRd2I\niMuAb2Tme4FfZnZKhWdFxIuH6y+OiEM7PMz/Bv7a/MM+hRKSWe5+bUT8rWGbT4+I5+1wv69t2qZU\nlYNcPbkKuGc4FenPDl8vB94eEfcBJ4BzHeZ3Jgu/GzgUESci4sZhfW5zu82XZysyvwgcYXau+o8x\ni1W+/RzbO3P/24EfGbZ57Y7PUnqKPGpFkjrnHrkkdc5BLkmdc5BLUucc5JLUOQe5JHXOQS5JnXOQ\nS1LnHOSS1Ln/Dyf2KGXZGchBAAAAAElFTkSuQmCC\n",
       "text": [
        "<matplotlib.figure.Figure at 0x7f1cdd419278>"
       ]
      }
     ],
     "prompt_number": 5
    },
    {
     "cell_type": "heading",
     "level": 4,
     "metadata": {},
     "source": [
      "Research Hypothesis 1"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "from scipy.stats.mstats import normaltest\n",
      "print (\"\\n****Test for Normality:\")\n",
      "print (\"normal test for sentiment\",normaltest(companies.sentiment))\n",
      "print (\"normal test for altman\",normaltest(companies.altman))\n",
      "print (\"a P close to zero indidicated that the data is not normal\")"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "\n",
        "****Test for Normality:\n",
        "normal test for sentiment (679.21183752063507, 3.2435713873994668e-148)\n",
        "normal test for altman (3404.2039999564081, 0.0)\n",
        "a P close to zero indidicated that the data is not normal\n"
       ]
      }
     ],
     "prompt_number": 6
    },
    {
     "cell_type": "heading",
     "level": 6,
     "metadata": {},
     "source": [
      "Spearman Correlation Analysis"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "print (\"Spearman correlation between sentiment and altman scales:\",sc.stats.spearmanr(companies.sentimentscale, companies.altmanscale))\n",
      "print (\"a P>0.05 indicates no statistically significant correlation\")"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "Spearman correlation between sentiment and altman scales: (-0.030236069863026162, 0.12193910694076453)\n",
        "a P>0.05 indicates no statistically significant correlation\n"
       ]
      }
     ],
     "prompt_number": 7
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "print (\"correlation analysis by sector\")\n",
      "print (\"For basic industries:\",\"(\",companies.sector[companies.sector==\"Basic Industries\"].count(),\" firms)\", \n",
      "       sc.stats.spearmanr(companies.altmanscale[companies.sector==\"Basic Industries\"].rank(method=\"average\", ascending=True), \n",
      "       companies.sentimentscale[companies.sector==\"Basic Industries\"].rank(method=\"average\", ascending=True)))\n",
      "print (\"For capital goods: \",\"(\",companies.sector[companies.sector==\"Capital Goods\"].count(),\" firms)\",\n",
      "       sc.stats.spearmanr(companies.altmanscale[companies.sector==\"Capital Goods\"], \n",
      "                          companies.sentimentscale[companies.sector==\"Capital Goods\"]))\n",
      "print (\"Consumer Durables: \",\"(\",companies.sector[companies.sector==\"Consumer Durables\"].count(),\" firms)\",\n",
      "       sc.stats.spearmanr(companies.altmanscale[companies.sector==\"Consumer Durables\"], \n",
      "                          companies.sentimentscale[companies.sector==\"Consumer Durables\"]))\n",
      "print (\"Consumer NonDurables: \",\"(\",companies.sector[companies.sector==\"Consumer Non-Durables\"].count(),\" firms)\",\n",
      "       sc.stats.spearmanr(companies.altmanscale[companies.sector==\"Consumer Non-Durables\"], \n",
      "                          companies.sentimentscale[companies.sector==\"Consumer Non-Durables\"]))\n",
      "print (\"Consumer Services: \",\"(\",companies.sector[companies.sector==\"Consumer Services\"].count(),\" firms)\",\n",
      "       sc.stats.spearmanr(companies.altmanscale[companies.sector==\"Consumer Services\"], \n",
      "                          companies.sentimentscale[companies.sector==\"Consumer Services\"]))\n",
      "print (\"Energy\",\"(\",companies.sector[companies.sector==\"Energy\"].count(),\" firms)\", \n",
      "       sc.stats.spearmanr(companies.altmanscale[companies.sector==\"Energy\"], companies.sentimentscale[companies.sector==\"Energy\"]))\n",
      "print (\"Healthcare\",\"(\",companies.sector[companies.sector==\"Health Care\"].count(),\" firms)\", \n",
      "       sc.stats.spearmanr(companies.altmanscale[companies.sector==\"Health Care\"], companies.sentimentscale[companies.sector==\"Health Care\"]))\n",
      "print (\"Miscellaneous\",\"(\",companies.sector[companies.sector==\"Miscellaneous\"].count(),\" firms)\", \n",
      "       sc.stats.spearmanr(companies.altmanscale[companies.sector==\"Miscellaneous\"], companies.sentimentscale[companies.sector==\"Miscellaneous\"]))\n",
      "print (\"Public Utilities\",\"(\",companies.sector[companies.sector==\"Public Utilities\"].count(),\" firms)\", \n",
      "       sc.stats.spearmanr(companies.altmanscale[companies.sector==\"Public Utilities\"], companies.sentimentscale[companies.sector==\"Public Utilities\"]))\n",
      "print (\"Technology\",\"(\",companies.sector[companies.sector==\"Technology\"].count(),\" firms)\",\n",
      "       sc.stats.spearmanr(companies.altmanscale[companies.sector==\"Technology\"], companies.sentimentscale[companies.sector==\"Technology\"]))\n",
      "print (\"Transportation\",\"(\",companies.sector[companies.sector==\"Transportation\"].count(),\" firms)\",\n",
      "       sc.stats.spearmanr(companies.altmanscale[companies.sector==\"Transportation\"], companies.sentimentscale[companies.sector==\"Transportation\"]))\n",
      "print (\"N/a\",\"(\",companies.sector[companies.sector==\"n/a\"].count(),\" firms)\", \n",
      "       sc.stats.spearmanr(companies.altmanscale[companies.sector==\"n/a\"], companies.sentimentscale[companies.sector==\"n/a\"]))"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "correlation analysis by sector\n",
        "For basic industries: ( 233  firms) (-0.057604509123855778, 0.38141460299857033)\n",
        "For capital goods:  ( 253  firms) (0.066972348438911422, 0.28860940659082662)\n",
        "Consumer Durables:  ( 100  firms) (0.055661386982693427, 0.58229018517890074)\n",
        "Consumer NonDurables:  ( 170  firms) (-0.086183593701254654, 0.26378632495570026)\n",
        "Consumer Services:  ( 397  firms) (-0.020205811438471499, 0.68814813451527768)\n",
        "Energy ( 212  firms) (-0.21930040287038272, 0.0013117767964922885)\n",
        "Healthcare ( 427  firms) (-0.04725238360919258, 0.33000480612483729)\n",
        "Miscellaneous ( 102  firms) (-0.048251155119557368, 0.63009898687019883)\n",
        "Public Utilities ( 138  firms) (-0.026246287152440182, 0.75993037837830979)\n",
        "Technology ( 499  firms) (-0.053218159188588389, 0.23536046951436518)\n",
        "Transportation ( 81  firms) (-0.047156968264577326, 0.6759114012086983)\n",
        "N/a"
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        " ( 6  firms) (-0.33333333333333331, 0.51851851851851827)\n"
       ]
      }
     ],
     "prompt_number": 8
    },
    {
     "cell_type": "heading",
     "level": 4,
     "metadata": {},
     "source": [
      "Research Hypothesis 2"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "print (\"****correlation analysis for financially distressed firms:\")\n",
      "print (sc.stats.spearmanr(companies.sentimentscale[companies.altmanscale==-1], companies.altman[companies.altmanscale==-1]))\n",
      "print (\"p>0.05 indiciates no significance\")"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "****correlation analysis for financially distressed firms:\n",
        "(-0.058611960064478115, 0.092492396411079425)\n",
        "p>0.05 indiciates no significance\n"
       ]
      }
     ],
     "prompt_number": 9
    },
    {
     "cell_type": "heading",
     "level": 4,
     "metadata": {},
     "source": [
      "Research Hypothesis 3"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "dailydata = pd.DataFrame(list(CompanyStocksSentimentHistory.objects.filter(~Q(sentiment=None) & ~Q(stockdirection=None)).values()))\n",
      "dailydata.sentimentsstr = dailydata.sentiment.apply(sentimentsstr)\n",
      "dailydata.stockmovementstr = dailydata.stockdirection.apply(stockmovementstr)\n",
      "dailydata.sentimentscale = dailydata.sentiment.apply(sentimentscale)\n",
      "#print (\"Dataset on Sentiment excluding days with no sentiments or no stock movements:\\n\",\n",
      "#       dailydata.sentimentsstr.groupby([dailydata.date,dailydata.sentimentsstr]).count())\n",
      "#print (\"Dataset on Stock Movement excluding days with no sentiments or no stock movements:\\n\",\n",
      "#       dailydata.stockmovementstr.groupby([dailydata.date,dailydata.stockmovementstr]).count())\n",
      "print (\"total tweets with sentiments and stock value: \", dailydata.tweet_count.sum())\n",
      "print (\"correlation between sentiment and stock direction:\",sc.stats.spearmanr(dailydata.sentiment,  dailydata.stockdirection))\n",
      "print (\"correlation between sentiment scale and stock direction:\",sc.stats.spearmanr(dailydata.sentiment,  dailydata.stockdirection))\n",
      "\n",
      "\n",
      "dailydata = pd.DataFrame(list(CompanyStocksSentimentHistory.objects.filter(~Q(sentiment=None)).values()))\n",
      "print (\"total tweets with sentiments: \", dailydata.tweet_count.sum())\n",
      "print (\"correlation between sentiment and tweet count:\",sc.stats.spearmanr(dailydata.sentiment,  dailydata.tweet_count))\n",
      "dailydata.sentimentscale = dailydata.sentiment.apply(sentimentscale)\n",
      "print (\"correlation between distress/safe stocks and stock direction\",\n",
      "       sc.stats.spearmanr(dailydata.sentimentscale[dailydata.sentimentscale != 2], dailydata.stockdirection))"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "total tweets with sentiments and stock value:  35726\n",
        "correlation between sentiment and stock direction: (-0.0050123386944458297, 0.67249850944176182)\n",
        "correlation between sentiment scale and stock direction: (-0.0050123386944458297, 0.67249850944176182)\n",
        "total tweets with sentiments: "
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        " 66038\n",
        "correlation between sentiment and tweet count: (-0.12071759517158794, 2.3157593866886628e-35)\n",
        "correlation between distress/safe stocks and stock direction (0.014313482237576216, 0.14261916055802118)\n"
       ]
      }
     ],
     "prompt_number": 19
    }
   ],
   "metadata": {}
  }
 ]
}